{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Action3使用基于邻域的协同过滤（KNNBasic,  KNNWithMeans, KNNWithZScore,  KNNBaseline中的任意一种）对MovieLens数据集进行协同过滤，采用k折交叉验证(k=3)，输出每次计算的RMSE, MAE\n",
    "1. 完成代码（10points）     \n",
    "2. 得出K折，每次的RMSE，MAE结果（10points）"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "from surprise import KNNWithMeans,KNNBasic,KNNWithZScore,KNNBaseline\n",
    "from surprise import Dataset, Reader\n",
    "from surprise import accuracy\n",
    "from surprise.model_selection import KFold\n",
    "import time\n",
    "# # 数据读取\n",
    "reader = Reader(line_format='user item rating timestamp', sep=',', skip_lines=1)\n",
    "data = Dataset.load_from_file('./ratings.csv', reader=reader)\n",
    "trainset = data.build_full_trainset()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('1', '2', 3.5, '1112486027'),\n",
       " ('1', '29', 3.5, '1112484676'),\n",
       " ('1', '32', 3.5, '1112484819'),\n",
       " ('1', '47', 3.5, '1112484727'),\n",
       " ('1', '50', 3.5, '1112484580'),\n",
       " ('1', '112', 3.5, '1094785740'),\n",
       " ('1', '151', 4.0, '1094785734'),\n",
       " ('1', '223', 4.0, '1112485573'),\n",
       " ('1', '253', 4.0, '1112484940'),\n",
       " ('1', '260', 4.0, '1112484826')]"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.raw_ratings[:10]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "trainset.ur?"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# itemCF 计算得分,取最相似的用户计算时，只取最相似的k个"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Computing the msd similarity matrix...\n",
      "Done computing similarity matrix.\n",
      "Computing the msd similarity matrix...\n",
      "Done computing similarity matrix.\n",
      "Computing the msd similarity matrix...\n",
      "Done computing similarity matrix.\n",
      "Evaluating RMSE, MAE of algorithm KNNBasic on 3 split(s).\n",
      "\n",
      "                  Fold 1  Fold 2  Fold 3  Mean    Std     \n",
      "RMSE (testset)    0.8959  0.8953  0.8948  0.8953  0.0005  \n",
      "MAE (testset)     0.6894  0.6889  0.6885  0.6889  0.0004  \n",
      "Fit time          25.50   26.54   26.10   26.05   0.43    \n",
      "Test time         130.49  133.27  137.55  133.77  2.91    \n",
      "{'test_rmse': array([0.89591623, 0.89531471, 0.89477905]), 'test_mae': array([0.68944753, 0.6888737 , 0.68848157]), 'fit_time': (25.502801179885864, 26.54126286506653, 26.104525804519653), 'test_time': (130.4885025024414, 133.27252078056335, 137.55458974838257)}\n",
      "490.7415449619293\n"
     ]
    }
   ],
   "source": [
    "# KNNBasic\n",
    "time1=time.time()\n",
    "algo = KNNBasic(k=50, sim_options={'user_based': False, 'verbose': 'True'})\n",
    "from surprise.model_selection import cross_validate\n",
    "print(cross_validate(algo,data,verbose=True,cv=3))\n",
    "time2=time.time()\n",
    "print(time2-time1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Computing the msd similarity matrix...\n",
      "Done computing similarity matrix.\n",
      "Computing the msd similarity matrix...\n",
      "Done computing similarity matrix.\n",
      "Evaluating RMSE, MAE of algorithm KNNWithMeans on 3 split(s).\n",
      "\n",
      "                  Fold 1  Fold 2  Fold 3  Mean    Std     \n",
      "RMSE (testset)    0.8549  0.8591  0.8554  0.8565  0.0019  \n",
      "MAE (testset)     0.6538  0.6566  0.6544  0.6549  0.0012  \n",
      "Fit time          25.79   27.09   27.03   26.63   0.60    \n",
      "Test time         135.73  137.02  136.81  136.52  0.56    \n",
      "{'test_rmse': array([0.85492326, 0.85908528, 0.85544832]), 'test_mae': array([0.65378461, 0.65660151, 0.65440594]), 'fit_time': (25.787825107574463, 27.086426258087158, 27.02553915977478), 'test_time': (135.73184251785278, 137.01962399482727, 136.80849146842957)}\n",
      "499.61939215660095\n"
     ]
    }
   ],
   "source": [
    "# KNNWithMeans\n",
    "time1=time.time()\n",
    "algo = KNNWithMeans(k=50, sim_options={'user_based': False, 'verbose': 'True'})\n",
    "from surprise.model_selection import cross_validate\n",
    "print(cross_validate(algo,data,verbose=True,cv=3))\n",
    "time2=time.time()\n",
    "print(time2-time1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Computing the msd similarity matrix...\n",
      "Done computing similarity matrix.\n",
      "Computing the msd similarity matrix...\n",
      "Done computing similarity matrix.\n",
      "Computing the msd similarity matrix...\n",
      "Done computing similarity matrix.\n",
      "Evaluating RMSE, MAE of algorithm KNNWithZScore on 3 split(s).\n",
      "\n",
      "                  Fold 1  Fold 2  Fold 3  Mean    Std     \n",
      "RMSE (testset)    0.8596  0.8556  0.8604  0.8585  0.0021  \n",
      "MAE (testset)     0.6566  0.6535  0.6563  0.6555  0.0014  \n",
      "Fit time          28.74   28.83   28.12   28.56   0.31    \n",
      "Test time         145.31  143.87  143.55  144.25  0.76    \n",
      "{'test_rmse': array([0.85964777, 0.85556223, 0.86043846]), 'test_mae': array([0.6565709 , 0.65351862, 0.65630903]), 'fit_time': (28.738308906555176, 28.827856302261353, 28.120132446289062), 'test_time': (145.30910730361938, 143.87344121932983, 143.55431127548218)}\n",
      "529.4907014369965\n"
     ]
    }
   ],
   "source": [
    "# KNNWithZScore\n",
    "time1=time.time()\n",
    "algo = KNNWithZScore(k=50, sim_options={'user_based': False, 'verbose': 'True'})\n",
    "from surprise.model_selection import cross_validate\n",
    "print(cross_validate(algo,data,verbose=True,cv=3))\n",
    "time2=time.time()\n",
    "print(time2-time1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Estimating biases using als...\n",
      "Computing the msd similarity matrix...\n",
      "Done computing similarity matrix.\n",
      "Estimating biases using als...\n",
      "Computing the msd similarity matrix...\n",
      "Done computing similarity matrix.\n",
      "Estimating biases using als...\n",
      "Computing the msd similarity matrix...\n",
      "Done computing similarity matrix.\n",
      "Evaluating RMSE, MAE of algorithm KNNBaseline on 3 split(s).\n",
      "\n",
      "                  Fold 1  Fold 2  Fold 3  Mean    Std     \n",
      "RMSE (testset)    0.8506  0.8527  0.8517  0.8517  0.0009  \n",
      "MAE (testset)     0.6512  0.6526  0.6521  0.6520  0.0006  \n",
      "Fit time          32.45   33.96   33.58   33.33   0.64    \n",
      "Test time         147.75  147.55  146.38  147.23  0.61    \n",
      "{'test_rmse': array([0.85064083, 0.85274646, 0.8516604 ]), 'test_mae': array([0.65123488, 0.65261007, 0.65209799]), 'fit_time': (32.44611120223999, 33.958768367767334, 33.575090646743774), 'test_time': (147.75398564338684, 147.554860830307, 146.38243985176086)}\n",
      "552.2534649372101\n"
     ]
    }
   ],
   "source": [
    "# KNNBaseline\n",
    "time1=time.time()\n",
    "algo = KNNBaseline(k=50, sim_options={'user_based': False, 'verbose': 'True'})\n",
    "from surprise.model_selection import cross_validate\n",
    "print(cross_validate(algo,data,verbose=True,cv=3))\n",
    "time2=time.time()\n",
    "print(time2-time1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
